In [1]:
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default = "plotly_white"
import plotly.express as px

data = pd.read_csv("C:\\Users\\AASHIMA\\Downloads\\python\\Apple-Fitness-Data.csv")
In [2]:
data.head()
Out[2]:
Date Time Step Count Distance Energy Burned Flights Climbed Walking Double Support Percentage Walking Speed
0 2023-03-21 16:01:23 46 0.02543 14.620 3 0.304 3.060
1 2023-03-21 16:18:37 645 0.40041 14.722 3 0.309 3.852
2 2023-03-21 16:31:38 14 0.00996 14.603 4 0.278 3.996
3 2023-03-21 16:45:37 13 0.00901 14.811 3 0.278 5.040
4 2023-03-21 17:10:30 17 0.00904 15.153 3 0.281 5.184
In [3]:
print(data.isnull().sum())
Date                                 0
Time                                 0
Step Count                           0
Distance                             0
Energy Burned                        0
Flights Climbed                      0
Walking Double Support Percentage    0
Walking Speed                        0
dtype: int64
In [4]:
fig1=px.line(data,x="Time",y="Step Count",title="Step Count Over Time")
fig1.update_xaxes(rangeslider_visible=True)
fig1.show()

Now, let’s have a look at the distance covered over time:¶

In [5]:
fig2=px.line(data,x="Time",y="Distance",title="Distance Over Time")
fig2.update_xaxes(rangeslider_visible=True)
fig2.show()

Now, let’s have a look at my energy burned over time:¶

In [6]:
fig3=px.line(data,x="Time",y="Energy Burned",title="Energy Bunned Over Time")
fig3.update_xaxes(rangeslider_visible=True)
fig3.show()

Now, let’s have a look at my walking speed over time:¶

In [7]:
fig4=px.line(data,x="Time",y="Walking Speed",title="Step Count Over Time")
fig4.update_xaxes(rangeslider_visible=True)
fig4.show()

Now, let’s calculate and look at the average step counts per day:¶

In [8]:
average_step_count=data.groupby("Date")["Step Count"].mean().reset_index()
average_step_count
Out[8]:
Date Step Count
0 2023-03-21 137.636364
1 2023-03-22 354.233333
2 2023-03-23 109.125000
3 2023-03-24 64.666667
4 2023-03-25 117.000000
5 2023-03-26 101.000000
6 2023-03-27 48.875000
7 2023-03-28 163.750000
8 2023-03-29 169.578947
9 2023-03-30 384.181818
10 2023-03-31 372.166667
11 2023-04-01 306.916667
In [9]:
fig5=px.bar(average_step_count,x="Date",y="Step Count",title="Average Step Count per Day")
fig5.update_xaxes(rangeslider_visible=True)
fig5.show()

Now, let’s have a look at my walking efficiency over time:¶

In [10]:
data["Walking Efficiency"]=data["Distance"]/data["Step Count"]
fig1=px.line(data,x="Time",y="Walking Efficiency",title="Walking Efficiency Over Time")
fig1.update_xaxes(rangeslider_visible=True)
fig1.show()

Now, let’s have a look at the step count and walking speed variations by time intervals:¶

In [11]:
# Create Time Intervals
time_intervals = pd.cut(pd.to_datetime(data["Time"]).dt.hour,
                        bins=[0, 12, 18, 24],
                        labels=["Morning", "Afternoon", "Evening"], 
                        )
data["Time Interval"]=time_intervals
fig7=px.scatter(data,x="Step Count",y="Walking Speed",color=time_intervals,title="Step Count and Walking Speed Variations by Time Interval",
                  trendline='ols')
fig7.show()

Now, let’s compare the daily average of all the health and fitness metrics:¶

In [12]:
daily_avg_metrics=data.groupby("Date").mean().reset_index()

daily_avg_metrics_melted=daily_avg_metrics.melt(id_vars=["Date"],value_vars=["Step Count", "Distance", 
                                                              "Energy Burned", "Flights Climbed", 
                                                              "Walking Double Support Percentage", 
                                                              "Walking Speed"])
daily_avg_metrics_melted
C:\Users\AASHIMA\AppData\Local\Temp\ipykernel_4120\853594503.py:1: FutureWarning:

The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.

Out[12]:
Date variable value
0 2023-03-21 Step Count 137.636364
1 2023-03-22 Step Count 354.233333
2 2023-03-23 Step Count 109.125000
3 2023-03-24 Step Count 64.666667
4 2023-03-25 Step Count 117.000000
... ... ... ...
67 2023-03-28 Walking Speed 4.902000
68 2023-03-29 Walking Speed 4.234737
69 2023-03-30 Walking Speed 4.434545
70 2023-03-31 Walking Speed 3.468000
71 2023-04-01 Walking Speed 4.497000

72 rows × 3 columns

In [13]:
fig=px.treemap(daily_avg_metrics_melted,path=["variable"],values="value",color="variable",hover_data=
              ["value"],title="Daily Averages for Different Metrics")
fig.show()
In [14]:
metrics_to_visualize = ["Distance", "Energy Burned", "Flights Climbed", 
                        "Walking Double Support Percentage", "Walking Speed"]

# Reshape data for treemap
daily_avg_metrics_melted = daily_avg_metrics.melt(id_vars=["Date"], value_vars=metrics_to_visualize)

fig = px.treemap(daily_avg_metrics_melted,
                 path=["variable"],
                 values="value",
                 color="variable",
                 hover_data=["value"],
                 title="Daily Averages for Different Metrics (Excluding Step Count)")
fig.show()
In [15]:
daily_avg_metrics_melted
Out[15]:
Date variable value
0 2023-03-21 Distance 0.086225
1 2023-03-22 Distance 0.230261
2 2023-03-23 Distance 0.075796
3 2023-03-24 Distance 0.042067
4 2023-03-25 Distance 0.080747
5 2023-03-26 Distance 0.068760
6 2023-03-27 Distance 0.032664
7 2023-03-28 Distance 0.102727
8 2023-03-29 Distance 0.115884
9 2023-03-30 Distance 0.252494
10 2023-03-31 Distance 0.245360
11 2023-04-01 Distance 0.202566
12 2023-03-21 Energy Burned 14.721273
13 2023-03-22 Energy Burned 15.158233
14 2023-03-23 Energy Burned 14.303000
15 2023-03-24 Energy Burned 15.268667
16 2023-03-25 Energy Burned 15.060222
17 2023-03-26 Energy Burned 18.504091
18 2023-03-27 Energy Burned 23.656625
19 2023-03-28 Energy Burned 14.853917
20 2023-03-29 Energy Burned 13.363737
21 2023-03-30 Energy Burned 13.236909
22 2023-03-31 Energy Burned 11.042167
23 2023-04-01 Energy Burned 8.238750
24 2023-03-21 Flights Climbed 2.909091
25 2023-03-22 Flights Climbed 2.466667
26 2023-03-23 Flights Climbed 2.375000
27 2023-03-24 Flights Climbed 2.666667
28 2023-03-25 Flights Climbed 2.555556
29 2023-03-26 Flights Climbed 2.000000
30 2023-03-27 Flights Climbed 2.000000
31 2023-03-28 Flights Climbed 4.000000
32 2023-03-29 Flights Climbed 1.684211
33 2023-03-30 Flights Climbed 2.545455
34 2023-03-31 Flights Climbed 2.500000
35 2023-04-01 Flights Climbed 2.250000
36 2023-03-21 Walking Double Support Percentage 0.294273
37 2023-03-22 Walking Double Support Percentage 0.310467
38 2023-03-23 Walking Double Support Percentage 0.312375
39 2023-03-24 Walking Double Support Percentage 0.307333
40 2023-03-25 Walking Double Support Percentage 0.297778
41 2023-03-26 Walking Double Support Percentage 0.291000
42 2023-03-27 Walking Double Support Percentage 0.284625
43 2023-03-28 Walking Double Support Percentage 0.300417
44 2023-03-29 Walking Double Support Percentage 0.298842
45 2023-03-30 Walking Double Support Percentage 0.293182
46 2023-03-31 Walking Double Support Percentage 0.279583
47 2023-04-01 Walking Double Support Percentage 0.296417
48 2023-03-21 Walking Speed 4.352727
49 2023-03-22 Walking Speed 3.502800
50 2023-03-23 Walking Speed 3.762000
51 2023-03-24 Walking Speed 3.936000
52 2023-03-25 Walking Speed 3.520000
53 2023-03-26 Walking Speed 3.135273
54 2023-03-27 Walking Speed 4.450500
55 2023-03-28 Walking Speed 4.902000
56 2023-03-29 Walking Speed 4.234737
57 2023-03-30 Walking Speed 4.434545
58 2023-03-31 Walking Speed 3.468000
59 2023-04-01 Walking Speed 4.497000

So this is how to perform Fitness Data Analysis using Python. Fitness Watch Data Analysis is a crucial tool for businesses in the health and wellness domain. By analyzing user data from fitness wearables, companies can understand user behaviour, offer personalized solutions, and contribute to improving users’ overall health and well-being.¶

In [ ]: